This code is for the duration-based activity information.

Load libraries and set theme

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.0     ✔ dplyr   1.0.5
## ✔ tidyr   1.1.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
theme_set(theme_bw())

Prep data

Read in data and demographic information

data_dur <- read_csv("./data_demo_lena_transcripts/elan_activity_dur.csv") %>% 
  mutate(id = factor(id), 
         language = factor(language)) %>% 
  dplyr::select(-X1) %>% 
  rename(tcds_min_seg = tcds_min) %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", "routines", "conv", "ac", "non-tcds")))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   activity = col_character(),
##   id = col_double(),
##   rectime = col_double(),
##   segment_num = col_double(),
##   language = col_character(),
##   dur_min = col_double(),
##   Dur10minval = col_double(),
##   AWCval = col_double(),
##   Time = col_double(),
##   Date = col_character(),
##   dur_10min = col_double(),
##   tcds_min = col_double()
## )
str(data_dur)
## tibble[,12] [1,654 × 12] (S3: tbl_df/tbl/data.frame)
##  $ activity    : Factor w/ 7 levels "books","play",..: 2 6 5 6 5 4 4 6 5 2 ...
##  $ id          : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ rectime     : num [1:1654] 15242 15242 15242 14342 14342 ...
##  $ segment_num : num [1:1654] 2 2 2 3 3 3 4 4 4 5 ...
##  $ language    : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dur_min     : num [1:1654] 3.05 3.38 2.51 6.3 2.03 ...
##  $ Dur10minval : num [1:1654] 600 600 600 600 600 600 512 512 512 538 ...
##  $ AWCval      : num [1:1654] 728 728 728 681 681 681 602 602 602 596 ...
##  $ Time        : num [1:1654] 20.4 20.4 20.4 20.2 20.2 ...
##  $ Date        : chr [1:1654] "2010.10.18" "2010.10.18" "2010.10.18" "2010.10.18" ...
##  $ dur_10min   : num [1:1654] 10 10 10 10 10 ...
##  $ tcds_min_seg: num [1:1654] 8.95 8.95 8.95 8.66 8.66 ...
# create two dfs for plots
data_dur_en <- data_dur %>% filter(language == "english")
data_dur_sp <- data_dur %>% filter(language == "spanish")

Check number of segments per participant

# checking to see there are 6 segments per participant
num_segments_english <- data_dur_en %>% 
  group_by(id, segment_num) %>% 
  distinct(segment_num) %>% 
  ungroup() %>% 
  count(id)

arrange(num_segments_english, n)
## # A tibble: 45 x 2
##    id        n
##    <fct> <int>
##  1 20001     6
##  2 20003     6
##  3 20004     6
##  4 20007     6
##  5 20034     6
##  6 20041     6
##  7 20048     6
##  8 20050     6
##  9 20056     6
## 10 20071     6
## # … with 35 more rows
num_segments_spanish <- data_dur_sp %>% 
  group_by(id, segment_num) %>% 
  distinct(segment_num) %>% 
  ungroup() %>% 
  count(id)

arrange(num_segments_spanish, n)
## # A tibble: 45 x 2
##    id        n
##    <fct> <int>
##  1 7292      6
##  2 7352      6
##  3 7355      6
##  4 7363      6
##  5 7373      6
##  6 7409      6
##  7 7412      6
##  8 7433      6
##  9 7446      6
## 10 7448      6
## # … with 35 more rows

Proportion duration - RAW per 10 min

# duration - minutes
ggplot(data_dur, aes(activity, dur_min, fill = activity)) +
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) +
  theme(legend.position= "none") +
  # theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7), 
        text = element_text(size = 30)) + 
  labs(x = "Activity", y = "Duration (min)") + 
  theme(panel.spacing = unit(4, "lines"))

ggsave("./figures/boxplot_duration.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
# english
data_en_act <- data_dur_en %>% 
  group_by(id, activity) %>% 
  mutate(dur_min_act = mean(dur_min)) %>% 
  distinct(id, language, activity, dur_min_act) %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", "routines", "conv", "ac", "non-tcds")))

# see min and max of actual data
data_dur_en %>% 
  ungroup() %>% 
  group_by(id, activity) %>% 
  mutate(min = min(dur_min), 
         max = max(dur_min)) %>% 
  distinct(activity, min, max)
## # A tibble: 257 x 4
## # Groups:   id, activity [257]
##    activity id       min   max
##    <fct>    <fct>  <dbl> <dbl>
##  1 play     20001 3.05    6.86
##  2 ac       20001 0.889   6.30
##  3 conv     20001 0.246   3.56
##  4 routines 20001 0.337   2.38
##  5 play     20003 2.54    2.90
##  6 routines 20003 5.61    7.81
##  7 ac       20003 0.0188  1.45
##  8 books    20003 3.55    6.57
##  9 food     20003 0.0118  3.31
## 10 play     20004 0.238   3.68
## # … with 247 more rows
# spanish
# note that min and max are of averages, not of actual data
data_sp_act <- data_dur_sp %>% 
  group_by(id, activity) %>% 
  mutate(dur_min_act = mean(dur_min)) %>% 
  distinct(id, language, activity, dur_min_act) %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", "routines", "conv", "ac", "non-tcds")))

# see min and max of actual data
data_dur_sp %>% 
  ungroup() %>% 
  group_by(activity) %>% 
  mutate(min = min(dur_min), 
         max = max(dur_min)) %>% 
  distinct(activity, min, max)
## # A tibble: 7 x 3
## # Groups:   activity [7]
##   activity     min   max
##   <fct>      <dbl> <dbl>
## 1 ac       0.00407  9.93
## 2 play     0.140    9.97
## 3 conv     0.100   10   
## 4 food     0.421    9.99
## 5 routines 0.0818  10   
## 6 books    1.15    10   
## 7 non-tcds 0.00368  8.98
# descriptives
# note that min and max are of averages, not of actual data
describeBy(data_en_act$dur_min_act, data_en_act$activity, mat = T, fast = T)
##     item   group1 vars  n     mean       sd         min      max    range
## X11    1    books    1 22 4.898482 2.018585 1.845591667 8.026042 6.180450
## X12    2     play    1 39 3.854464 1.728633 0.817550000 7.480950 6.663400
## X13    3     food    1 31 2.772894 2.225333 0.062700000 9.857617 9.794917
## X14    4 routines    1 32 2.182612 1.587954 0.044533333 6.448958 6.404425
## X15    5     conv    1 43 2.370739 1.522432 0.109333333 6.675067 6.565733
## X16    6       ac    1 45 1.797001 1.165076 0.007483333 4.802520 4.795037
## X17    7 non-tcds    1 45 3.655110 1.631661 0.880413889 7.118992 6.238578
##            se
## X11 0.4303637
## X12 0.2768027
## X13 0.3996816
## X14 0.2807133
## X15 0.2321687
## X16 0.1736792
## X17 0.2432337
describeBy(data_sp_act$dur_min_act, data_sp_act$activity, mat = T, fast = T)
##     item   group1 vars  n     mean       sd        min      max    range
## X11    1    books    1 20 6.182757 2.219610 1.14918333 8.936558 7.787375
## X12    2     play    1 37 3.806485 2.551134 0.21073333 9.810133 9.599400
## X13    3     food    1 31 4.084679 2.588315 0.49641667 9.967417 9.471000
## X14    4 routines    1 35 2.849555 1.753518 0.15326667 7.072858 6.919592
## X15    5     conv    1 43 2.829679 1.963876 0.16178333 8.328506 8.166722
## X16    6       ac    1 45 2.721287 1.434980 0.44101250 6.044525 5.603512
## X17    7 non-tcds    1 45 3.206051 1.717209 0.08946667 7.312114 7.222647
##            se
## X11 0.4963199
## X12 0.4194038
## X13 0.4648751
## X14 0.2963986
## X15 0.2994883
## X16 0.2139141
## X17 0.2559864

Proportion duration - summed across segments

# duration - minutes
data_dur_sum <- data_dur %>% 
  group_by(id, activity) %>% 
  mutate(dur_act_total = sum(dur_min)) %>% 
  distinct(id, language, activity, dur_act_total) %>% 
  ungroup() %>% 
  group_by(id) %>% 
  mutate(dur_hour = sum(dur_act_total))

ggplot(data_dur_sum, aes(activity, dur_act_total, fill = activity)) +
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) +
  theme(legend.position= "none") +
  # theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7), 
        text = element_text(size = 30)) + 
  labs(x = "Activity", y = "Sum Duration Across Segments (min)") + 
  theme(panel.spacing = unit(4, "lines"))

data_dur_sum_en <- data_dur_sum %>% filter(language == "english")
data_dur_sum_sp <- data_dur_sum %>% filter(language == "spanish")


# descriptives
# note that min and max are of averages, not of actual data
describeBy(data_dur_sum_en$dur_act_total, data_dur_sum_en$activity, mat = T, fast = T)
##     item   group1 vars  n      mean       sd         min      max    range
## X11    1    books    1 22 11.452584 9.196898 2.089166667 39.92383 37.83467
## X12    2     play    1 39 10.512915 7.987952 1.101933333 33.71398 32.61205
## X13    3     food    1 31  5.786824 5.328664 0.062700000 20.17702 20.11432
## X14    4 routines    1 32  4.840254 5.070568 0.044533333 25.63767 25.59313
## X15    5     conv    1 43  8.031319 6.026471 0.109333333 24.31743 24.20810
## X16    6       ac    1 45  7.800962 6.645371 0.007483333 25.96090 25.95342
## X17    7 non-tcds    1 45 21.854143 9.813560 5.282483333 42.71395 37.43147
##            se
## X11 1.9607853
## X12 1.2790960
## X13 0.9570564
## X14 0.8963582
## X15 0.9190282
## X16 0.9906334
## X17 1.4629191
describeBy(data_dur_sum_sp$dur_act_total, data_dur_sum_sp$activity, mat = T, fast = T)
##     item   group1 vars  n      mean        sd       min      max    range
## X11    1    books    1 20 12.162309 10.363463 1.1491833 45.30968 44.16050
## X12    2     play    1 37  8.598127  7.822008 0.2107333 27.69682 27.48608
## X13    3     food    1 31  7.321418  5.671825 0.4964167 20.78387 20.28745
## X14    4 routines    1 35  4.911878  3.879373 0.1532667 14.14572 13.99245
## X15    5     conv    1 43  8.231171  6.919189 0.1617833 24.98552 24.82373
## X16    6       ac    1 45 11.152151  7.595329 1.0423667 30.58165 29.53928
## X17    7 non-tcds    1 45 19.189398 10.347160 0.3578667 43.87268 43.51482
##            se
## X11 2.3173409
## X12 1.2859302
## X13 1.0186899
## X14 0.6557337
## X15 1.0551665
## X16 1.1322447
## X17 1.5424636

Proportion duration - other child-centered

# duration - minutes
data_dur_othercc <- data_dur %>% 
  mutate(activity2 = ifelse(activity == "books", "books",
                     ifelse(activity == "ac", "ac",
                     ifelse(activity == "non-tcds", "non-tcds", "othercc")))) %>% 
  group_by(id, activity2) %>% 
  mutate(dur_act_total = sum(dur_min)) %>% 
  distinct(id, language, activity2, dur_act_total) %>% 
  ungroup() %>% 
  group_by(id) %>% 
  mutate(dur_hour = sum(dur_act_total))

data_dur_othercc_en <- data_dur_othercc %>% filter(language == "english")
data_dur_othercc_sp <- data_dur_othercc %>% filter(language == "spanish")


# descriptives
# note that min and max are of averages, not of actual data
describeBy(data_dur_othercc_en$dur_act_total, data_dur_othercc_en$activity2, mat = T, fast = T)
##     item   group1 vars  n      mean        sd         min      max    range
## X11    1       ac    1 45  7.800962  6.645371 0.007483333 25.96090 25.95342
## X12    2    books    1 22 11.452584  9.196898 2.089166667 39.92383 37.83467
## X13    3 non-tcds    1 45 21.854143  9.813560 5.282483333 42.71395 37.43147
## X14    4  othercc    1 45 24.214002 11.451546 1.384766667 51.27485 49.89008
##            se
## X11 0.9906334
## X12 1.9607853
## X13 1.4629191
## X14 1.7070957
describeBy(data_dur_othercc_sp$dur_act_total, data_dur_othercc_sp$activity2, mat = T, fast = T)
##     item   group1 vars  n     mean        sd       min      max    range
## X11    1       ac    1 45 11.15215  7.595329 1.0423667 30.58165 29.53928
## X12    2    books    1 20 12.16231 10.363463 1.1491833 45.30968 44.16050
## X13    3 non-tcds    1 45 19.18940 10.347160 0.3578667 43.87268 43.51482
## X14    4  othercc    1 45 23.79891 11.905835 0.4964167 47.55308 47.05667
##           se
## X11 1.132245
## X12 2.317341
## X13 1.542464
## X14 1.774817

Plotting activity by time of day

For publication

data_dur_pub <- data_dur %>% 
  mutate(activity = recode(activity, "ac" = "Adult-cent.", "conv" = "Unst. Conv.", 
                           "books" = "Books", "play" = "Playing", "food" = "Feeding", 
                           "routines" = "Routines", "non-tcds" = "non-tCDS"), 
         language = recode(language, "english" = "English", "spanish" = "Spanish")) %>% 
  rename("Language" = "language") %>% 
  mutate(activity = factor(activity, levels = c("Books", "Playing", "Feeding", "Routines", "Unst. Conv.", "Adult-cent.", "non-tCDS")))

# combined groups
ggplot(data_dur_pub, aes(Time, fct_rev(activity), color = activity, shape = Language)) + 
  geom_jitter(size = 10, alpha = .5, stroke = 2) + 
  scale_x_continuous(breaks = c(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) +
  scale_color_manual(values = c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "lightgrey"), guide = "none") + 
  scale_shape_manual(values=c(16, 21)) + 
  geom_vline(xintercept = 8) +
  geom_vline(xintercept = 12) + 
  geom_vline(xintercept = 16) +
  geom_vline(xintercept = 20) + 
  theme(text = element_text(size = 40)) +
  theme(legend.position = c(1, 1), 
        legend.justification = c(1, 1),
        legend.box.margin=margin(c(10,10,10,10)),
        legend.background = element_rect(fill = "white"), # , color = "black"
        legend.text = element_text(size = 18), 
        legend.title = element_text(size = 21), 
        legend.key.size = unit(1, "cm")) +
  labs(x = "Time", y = "")

  # theme(axis.title.y = element_text(angle = 0,  vjust = 0.5))

ggsave("./figures/time_activity_by_dur.pdf", width = 22, height = 11, dpi = 300)

Pie charts for proportion duration of time per hour across families (adding time across six segments)

Create dfs for individual participants

# create duration per hour
total_dur_hour <- data_dur %>% 
  distinct(id, segment_num, rectime, Dur10minval, language) %>% 
  group_by(id) %>% 
  mutate(dur_six_segments = sum(Dur10minval)/60) %>% 
  distinct(id, dur_six_segments, language)


# merge with summary of dur per activity in one hour
variables_dur_all <- data_dur %>% 
  filter(activity != "non-tcds") %>% 
  dplyr::select(-c(dur_10min, tcds_min_seg)) %>% 
  full_join(total_dur_hour, by = c("id", "language")) %>% 
  group_by(id, activity) %>% 
  mutate(dur_activity_1hr = sum(dur_min)) %>% 
  distinct(id, activity, dur_activity_1hr, dur_six_segments, language)


# make wide, add zeros, make long, create proportion variable
data_dur_prop_all <- variables_dur_all %>% 
  spread(activity, dur_activity_1hr) %>% 
  replace_na(list(play = 0, food = 0, conv = 0, books = 0, routines = 0, ac = 0)) %>% 
  mutate(nontcds = dur_six_segments - (play + food + conv + books + routines + ac)) %>% 
  gather(activity, value = dur_activity_1hr_zeros, 
         play, food, conv, books, routines, ac, nontcds) %>% 
  mutate(prop_dur_activity_1hr = dur_activity_1hr_zeros / dur_six_segments) %>% 
  mutate(activity = factor(activity))


# create wide df for min per activity
data_dur_min_all_wide <- data_dur_prop_all %>% 
  dplyr::select(-prop_dur_activity_1hr) %>% 
  spread(activity, dur_activity_1hr_zeros) %>% 
  mutate(dur_min_total = ac + books + conv + food + nontcds + play + routines) %>% 
  mutate(dur_min_childcc = books + conv + food + play + routines) %>% 
  mutate(dur_min_tcds = ac + books + conv + food + play + routines)

psych::describeBy(data_dur_min_all_wide, data_dur_min_all_wide$language, fast = T)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## 
##  Descriptive statistics by group 
## group: english
##                  vars  n  mean    sd   min   max range   se
## id                  1 45   NaN    NA   Inf  -Inf  -Inf   NA
## language            2 45   NaN    NA   Inf  -Inf  -Inf   NA
## dur_six_segments    3 45 59.47  1.09 55.23 60.00  4.77 0.16
## ac                  4 45  7.80  6.65  0.01 25.96 25.95 0.99
## books               5 45  5.60  8.60  0.00 39.92 39.92 1.28
## conv                6 45  7.67  6.12  0.00 24.32 24.32 0.91
## food                7 45  3.99  5.17  0.00 20.18 20.18 0.77
## nontcds             8 45 21.85  9.81  5.28 42.71 37.43 1.46
## play                9 45  9.11  8.26  0.00 33.71 33.71 1.23
## routines           10 45  3.44  4.80  0.00 25.64 25.64 0.72
## dur_min_total      11 45 59.47  1.09 55.23 60.00  4.77 0.16
## dur_min_childcc    12 45 29.81 12.75  1.38 54.71 53.33 1.90
## dur_min_tcds       13 45 37.61  9.66 17.29 54.72 37.43 1.44
## ------------------------------------------------------------ 
## group: spanish
##                  vars  n  mean    sd   min   max range   se
## id                  1 45   NaN    NA   Inf  -Inf  -Inf   NA
## language            2 45   NaN    NA   Inf  -Inf  -Inf   NA
## dur_six_segments    3 45 59.55  1.20 53.55 60.00  6.45 0.18
## ac                  4 45 11.15  7.60  1.04 30.58 29.54 1.13
## books               5 45  5.41  9.15  0.00 45.31 45.31 1.36
## conv                6 45  7.87  6.97  0.00 24.99 24.99 1.04
## food                7 45  5.04  5.80  0.00 20.78 20.78 0.87
## nontcds             8 45 19.19 10.35  0.36 43.87 43.51 1.54
## play                9 45  7.07  7.82  0.00 27.70 27.70 1.17
## routines           10 45  3.82  3.99  0.00 14.15 14.15 0.59
## dur_min_total      11 45 59.55  1.20 53.55 60.00  6.45 0.18
## dur_min_childcc    12 45 29.20 14.16  0.50 52.37 51.87 2.11
## dur_min_tcds       13 45 40.36 10.69 16.13 59.64 43.51 1.59
# create wide df for prop min per activity
data_dur_prop_all_wide <- data_dur_prop_all %>% 
  dplyr::select(-dur_activity_1hr_zeros) %>% 
  spread(activity, prop_dur_activity_1hr) %>% 
  mutate(prop_total = ac + books + conv + food + nontcds + play + routines) %>% 
  mutate(prop_childcc = books + conv + food + play + routines) %>% 
  mutate(prop_tcds = ac + books + conv + food + play + routines)

# descriptives for proportion of min per activity
psych::describeBy(data_dur_prop_all_wide, data_dur_prop_all_wide$language, fast = T)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## 
##  Descriptive statistics by group 
## group: english
##                  vars  n  mean   sd   min   max range   se
## id                  1 45   NaN   NA   Inf  -Inf  -Inf   NA
## language            2 45   NaN   NA   Inf  -Inf  -Inf   NA
## dur_six_segments    3 45 59.47 1.09 55.23 60.00  4.77 0.16
## ac                  4 45  0.13 0.11  0.00  0.44  0.44 0.02
## books               5 45  0.09 0.14  0.00  0.67  0.67 0.02
## conv                6 45  0.13 0.10  0.00  0.41  0.41 0.02
## food                7 45  0.07 0.09  0.00  0.34  0.34 0.01
## nontcds             8 45  0.37 0.16  0.09  0.71  0.62 0.02
## play                9 45  0.15 0.14  0.00  0.59  0.59 0.02
## routines           10 45  0.06 0.08  0.00  0.46  0.46 0.01
## prop_total         11 45  1.00 0.00  1.00  1.00  0.00 0.00
## prop_childcc       12 45  0.50 0.21  0.02  0.91  0.89 0.03
## prop_tcds          13 45  0.63 0.16  0.29  0.91  0.62 0.02
## ------------------------------------------------------------ 
## group: spanish
##                  vars  n  mean   sd   min   max range   se
## id                  1 45   NaN   NA   Inf  -Inf  -Inf   NA
## language            2 45   NaN   NA   Inf  -Inf  -Inf   NA
## dur_six_segments    3 45 59.55 1.20 53.55 60.00  6.45 0.18
## ac                  4 45  0.19 0.13  0.02  0.51  0.49 0.02
## books               5 45  0.09 0.15  0.00  0.76  0.76 0.02
## conv                6 45  0.13 0.12  0.00  0.42  0.42 0.02
## food                7 45  0.08 0.10  0.00  0.35  0.35 0.01
## nontcds             8 45  0.32 0.18  0.01  0.73  0.73 0.03
## play                9 45  0.12 0.13  0.00  0.46  0.46 0.02
## routines           10 45  0.06 0.07  0.00  0.24  0.24 0.01
## prop_total         11 45  1.00 0.00  1.00  1.00  0.00 0.00
## prop_childcc       12 45  0.49 0.24  0.01  0.90  0.90 0.04
## prop_tcds          13 45  0.68 0.18  0.27  0.99  0.73 0.03

Create dfs for single mean values across families

prop_dur_mean_1hr_all <- data_dur_prop_all %>% 
  group_by(activity, language) %>% 
  dplyr::summarize(mean = mean(prop_dur_activity_1hr, na.rm=TRUE)) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", "routines", "conv", "ac", "non-tcds")))
## `summarise()` has grouped output by 'activity'. You can override using the
## `.groups` argument.
prop_dur_mean_1hr_all %>% spread(activity, mean)
## # A tibble: 2 x 8
##   language  books  play   food routines  conv    ac `<NA>`
##   <fct>     <dbl> <dbl>  <dbl>    <dbl> <dbl> <dbl>  <dbl>
## 1 english  0.0939 0.153 0.0671   0.0583 0.129 0.131  0.367
## 2 spanish  0.0906 0.118 0.0845   0.0641 0.132 0.188  0.323

For publication

prop_dur_mean_1hr_all_pub <- prop_dur_mean_1hr_all %>% 
  mutate(language = recode(language, "english" = "English", "spanish" = "Spanish")) %>% 
  mutate(activity = recode(activity, "ac" = "adult-cent.", "conv" = "unst. conv.", "nontcds" = "non-tcds")) 

ggplot(prop_dur_mean_1hr_all_pub, aes(x="", y = mean, fill = activity)) + 
  theme_classic() +
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y", start = 0) +
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "grey92")) + 
  geom_text(aes(label = paste0(round(mean*100), "%")), position = position_stack(vjust = 0.5), size = 20) + 
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) + 
  # scale_y_continuous(breaks = y.breaks,   # where to place the labels
  #                    labels = prop_dur_mean_1hr_all_pub$activity) + # the labels
  labs(title = "", x = "", y = "") +
  theme(text = element_text(size = 60),
        legend.position = "none") +
  # theme(legend.direction = "horizontal", 
        # legend.position = "bottom") +
  facet_wrap(~ language)

ggsave("./figures/piechart_dur_1hr.pdf", width = 24, height = 24, units = "in", dpi = 300)

Additional figures (not in publication)

Bar plot of each family and proportion duration per hour - sorted by AWC in one hour

# total awc per hour
total_awc_hour <- data_dur %>% 
  distinct(id, segment_num, rectime, AWCval, language) %>% 
  group_by(id) %>% 
  mutate(awc_total_tophr = sum(AWCval)) %>% 
  distinct(id, awc_total_tophr, language)


data_dur_prop_en <- data_dur_prop_all %>% 
  full_join(total_awc_hour, by = c("id", "language")) %>% 
  filter(language == "english") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", "routines", "conv", "ac", "nontcds")))

data_dur_prop_sp <- data_dur_prop_all %>% 
  full_join(total_awc_hour, by = c("id", "language")) %>% 
  filter(language == "spanish") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", "routines", "conv", "ac", "nontcds")))


# plots
ggplot(data_dur_prop_en, aes(x = reorder(id, awc_total_tophr),  y = prop_dur_activity_1hr, fill = activity)) + 
  geom_col() + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "dodgerblue1", "green2", "darkgoldenrod1", "darkgrey", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(data_dur_prop_sp, aes(x = reorder(id, awc_total_tophr),  y = prop_dur_activity_1hr, fill = activity)) + 
  geom_col() + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "dodgerblue1", "green2", "darkgoldenrod1", "darkgrey", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

# plots - highlighting book reading
ggplot(data_dur_prop_en, aes(x = reorder(id, awc_total_tophr),  y = prop_dur_activity_1hr, fill = activity)) + 
  geom_col() + 
  scale_fill_manual(values=c("darkviolet", "grey", "grey", "grey", "grey", "grey", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(data_dur_prop_sp, aes(x = reorder(id, awc_total_tophr),  y = prop_dur_activity_1hr, fill = activity)) + 
  geom_col() + 
  scale_fill_manual(values=c("darkviolet", "grey", "grey", "grey", "grey", "grey", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Instance of each activity per participant

ggplot(data_dur_en, aes(id, fct_rev(activity), color = activity)) + 
  geom_point(size = 8) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "dodgerblue1", "green2", "darkgoldenrod1", "darkgrey", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(data_dur_sp, aes(id, fct_rev(activity), color = activity)) + 
  geom_point(size = 8) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "dodgerblue1", "green2", "darkgoldenrod1", "darkgrey", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))